/*
 * Copyright 2000-2003 Niels Provos <provos@citi.umich.edu>
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. The name of the author may not be used to endorse or promote products
 *    derived from this software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
#ifndef HAVE_CONFIG_H
#include <config.h>
#endif

#include <stdint.h>
#include <sys/types.h>
#include <sys/resource.h>
#ifdef HAVE_SYS_TIME_H
#include <sys/time.h>
#else
#include <sys/_time.h>
#endif
#include <sys/queue.h>
#include <sys/epoll.h>
#include <signal.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <errno.h>
#ifdef HAVE_FCNTL_H
#include <fcntl.h>
#endif

#include <event.h>
#include <event-internal.h>
#include <evsignal.h>
#include <log.h>

/* due to limitations in the epoll interface, we need to keep track of
 * all file descriptors outself.
 */

struct evepoll
  {

    struct event *evread;

    struct event *evwrite;
  };

struct epollop
  {

    struct evepoll *fds;
    int nfds;

    struct epoll_event *events;
    int nevents;
    int epfd;
  };

void *epoll_init	(struct event_base *);
int epoll_add	(void *, struct event *);
int epoll_del	(void *, struct event *);
int epoll_dispatch	(struct event_base *, void *, struct timeval *);
void epoll_dealloc	(struct event_base *, void *);

struct eventop epollops =
  {
    "epoll",
    epoll_init,
    epoll_add,
    epoll_del,
    epoll_dispatch,
    epoll_dealloc,
    1 /* need reinit */
  };

#ifdef HAVE_SETFD
#define FD_CLOSEONEXEC(x) do { \
      if (fcntl(x, F_SETFD, 1) == -1) \
        event_warn("fcntl(%d, F_SETFD)", x); \
    } while (0)
#else
#define FD_CLOSEONEXEC(x)
#endif

#define NEVENT	32000

void *
epoll_init (struct event_base *base)
{
  int epfd, nfiles = NEVENT;

  struct rlimit rl;

  struct epollop *epollop;

  /* Disable epollueue when this environment variable is set */

  if (getenv ("EVENT_NOEPOLL") )
    return (NULL);

  if (getrlimit (RLIMIT_NOFILE, &rl) == 0 &&
          rl.rlim_cur != RLIM_INFINITY)
    {
      /*
       * Solaris is somewhat retarded - it's important to drop
       * backwards compatibility when making changes.  So, don't
       * dare to put rl.rlim_cur here.
       */
      nfiles = rl.rlim_cur - 1;
    }

  /* Initalize the kernel queue */

  if ( (epfd = epoll_create (nfiles) ) == -1)
    {
      event_warn ("epoll_create");
      return (NULL);
    }

  FD_CLOSEONEXEC (epfd);

  if (! (epollop = calloc (1, sizeof (struct epollop) ) ) )
    return (NULL);

  epollop->epfd = epfd;

  /* Initalize fields */
  epollop->events = malloc (nfiles * sizeof (struct epoll_event) );

  if (epollop->events == NULL)
    {
      free (epollop);
      return (NULL);
    }

  epollop->nevents = nfiles;

  epollop->fds = calloc (nfiles, sizeof (struct evepoll) );

  if (epollop->fds == NULL)
    {
      free (epollop->events);
      free (epollop);
      return (NULL);
    }

  epollop->nfds = nfiles;

  evsignal_init (base);

  return (epollop);
}

static int
epoll_recalc (struct event_base *base, void *arg, int max)
{

  struct epollop *epollop = arg;

  /* unused parameter */
  (void) base;

  if (max > epollop->nfds)
    {

      struct evepoll *fds;
      int nfds;

      nfds = epollop->nfds;

      while (nfds < max)
        nfds <<= 1;

      fds = realloc (epollop->fds, nfds * sizeof (struct evepoll) );

      if (fds == NULL)
        {
          event_warn ("realloc");
          return (-1);
        }

      epollop->fds = fds;

      memset (fds + epollop->nfds, 0,
              (nfds - epollop->nfds) * sizeof (struct evepoll) );
      epollop->nfds = nfds;
    }

  return (0);
}

int
epoll_dispatch (struct event_base *base, void *arg, struct timeval *tv)
{

  struct epollop *epollop = arg;

  struct epoll_event *events = epollop->events;

  struct evepoll *evep;
  int i, res, timeout = -1;

  if (tv != NULL)
    timeout = tv->tv_sec * 1000 + (tv->tv_usec + 999) / 1000;

  res = epoll_wait (epollop->epfd, events, epollop->nevents, timeout);

  if (res == -1)
    {
      if (errno != EINTR)
        {
          event_warn ("epoll_wait");
          return (-1);
        }

      evsignal_process (base);

      return (0);
    }

  else if (base->sig.evsignal_caught)
    {
      evsignal_process (base);
    }

  event_debug ( ("%s: epoll_wait reports %d", __func__, res) );

  for (i = 0; i < res; i++)
    {
      int what = events[i].events;

      struct event *evread = NULL, *evwrite = NULL;

      evep = (struct evepoll *) events[i].data.ptr;

      if (what & (EPOLLHUP | EPOLLERR) )
        {
          evread = evep->evread;
          evwrite = evep->evwrite;
        }

      else
        {
          if (what & EPOLLIN)
            {
              evread = evep->evread;
            }

          if (what & EPOLLOUT)
            {
              evwrite = evep->evwrite;
            }
        }

      if (! (evread || evwrite) )
        continue;

      if (evread != NULL)
        event_active (evread, EV_READ, 1);

      if (evwrite != NULL)
        event_active (evwrite, EV_WRITE, 1);
    }

  return (0);
}


int
epoll_add (void *arg, struct event *ev)
{

  struct epollop *epollop = arg;

  struct epoll_event epev =
    {
      0, {0}
    };

  struct evepoll *evep;
  int fd, op, events;

  if (ev->ev_events & EV_SIGNAL)
    return (evsignal_add (ev) );

  fd = ev->ev_fd;

  if (fd >= epollop->nfds)
    {
      /* Extent the file descriptor array as necessary */
      if (epoll_recalc (ev->ev_base, epollop, fd) == -1)
        return (-1);
    }

  evep = &epollop->fds[fd];

  op = EPOLL_CTL_ADD;
  events = 0;

  if (evep->evread != NULL)
    {
      events |= EPOLLIN;
      op = EPOLL_CTL_MOD;
    }

  if (evep->evwrite != NULL)
    {
      events |= EPOLLOUT;
      op = EPOLL_CTL_MOD;
    }

  if (ev->ev_events & EV_READ)
    events |= EPOLLIN;

  if (ev->ev_events & EV_WRITE)
    events |= EPOLLOUT;

  epev.data.ptr = evep;

  epev.events = events;

  if (epoll_ctl (epollop->epfd, op, ev->ev_fd, &epev) == -1)
    return (-1);

  /* Update events responsible */
  if (ev->ev_events & EV_READ)
    evep->evread = ev;

  if (ev->ev_events & EV_WRITE)
    evep->evwrite = ev;

  return (0);
}

int
epoll_del (void *arg, struct event *ev)
{

  struct epollop *epollop = arg;

  struct epoll_event epev =
    {
      0, {0}
    };

  struct evepoll *evep;
  int fd, events, op;
  int needwritedelete = 1, needreaddelete = 1;

  if (ev->ev_events & EV_SIGNAL)
    return (evsignal_del (ev) );

  fd = ev->ev_fd;

  if (fd >= epollop->nfds)
    return (0);

  evep = &epollop->fds[fd];

  op = EPOLL_CTL_DEL;

  events = 0;

  if (ev->ev_events & EV_READ)
    events |= EPOLLIN;

  if (ev->ev_events & EV_WRITE)
    events |= EPOLLOUT;

  if ( (events & (EPOLLIN | EPOLLOUT) ) != (EPOLLIN | EPOLLOUT) )
    {
      if ( (events & EPOLLIN) && evep->evwrite != NULL)
        {
          needwritedelete = 0;
          events = EPOLLOUT;
          op = EPOLL_CTL_MOD;
        }

      else if ( (events & EPOLLOUT) && evep->evread != NULL)
        {
          needreaddelete = 0;
          events = EPOLLIN;
          op = EPOLL_CTL_MOD;
        }
    }

  epev.events = events;

  epev.data.ptr = evep;

  if (needreaddelete)
    evep->evread = NULL;

  if (needwritedelete)
    evep->evwrite = NULL;

  if (epoll_ctl (epollop->epfd, op, fd, &epev) == -1)
    return (-1);

  return (0);
}

void
epoll_dealloc (struct event_base *base, void *arg)
{

  struct epollop *epollop = arg;

  evsignal_dealloc (base);

  if (epollop->fds)
    free (epollop->fds);

  if (epollop->events)
    free (epollop->events);

  if (epollop->epfd >= 0)
    close (epollop->epfd);

  memset (epollop, 0, sizeof (struct epollop) );

  free (epollop);
}
